###############################################################################################################################################################################
#1. INTRODUCTION
#title: Analysis code (pre-processing) for Supran, Rahmstorf, and Oreskes (2022) 'Assessing ExxonMobil’s global warming projections'
#author: Geoffrey Supran
#date: 3 June 2022
#output(s): .rda in dir = 'Data/'
#description: This is the code used to load and pre-process data analyzed and reported in Supran, Rahmstorf, and Oreskes (2022).
#As described in Supran, Rahmstorf, and Oreskes (2022), we identified all documents that reported climate model outputs of: (i) a time series of projected future global mean surface temperatures (GMSTs); and (ii) future external radiative forcings due to greenhouse gases.
#This yielded 12 documents published between 1977-2003, which contain 16 distinct temperature projections presented in the form of 12 unique graphs and one table. 
#Once identified, all original GMST and forcing projections were converted for analysis: graphs were digitized using the free online application WebPlotDigitizer (v.4.4), and tables were extracted to spreadsheets. All resulting raw data are provided in the spreadsheet 'RawData_projected.xlsx' in our online depository (dir = 'Raw data/').
#We compare these projected temperature changes to five historical observed time series: Hadley/UEA HadCRUT4; National Oceanic and Atmospheric Administration (NOAA) GlobalTemp; National Aeronautics and Space Administration (NASA) GISTEMP; Berkeley Earth; and Cowtan and Way (2014). This raw data is provided in the spreadsheet 'RawData_historical.xlsx' in our online depository (dir = 'Raw data/').
#All projected and historical data are pre-processed into a standardized format using the following script run in R, then output to dir = 'Data/'.
###############################################################################################################################################################################
#2. SETUP
library(readxl)
library(ggplot2)
library(egg)
library(tidyr)
library(forecast)
library(ggrepel)
library(boot)
setwd("###/")
###############################################################################################################################################################################
#3. MAIN CODE: Load Projection data
dir = 'Raw data/'
offsets = data.frame()
#--------------------------------------------------------------------------------------------------------------------
#INTERNAL DOCUMENTS
#--------------------------------------------------------------------------------------------------------------------
###1977 - Black###
df = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "1977 - Black")
df = df[-1,-c(1:3,6,9,12,15)]
colnames(df) = c('year','dT','year_high','dT_high','year_low','dT_low','year_CO2','CO2')

df2 = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "1979 - Mastracchio")
df2 = df2[-1,c(17:18)]
colnames(df2) = c('year_CO2','CO2')

xmin = ceiling(as.numeric(min(df$year, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year, na.rm = TRUE))) #Find latest year, rounding down
xmin_high = ceiling(as.numeric(min(df$year_high, na.rm = TRUE))) #Find earliest year, rounding up
xmax_high = floor(as.numeric(max(df$year_high, na.rm = TRUE))) #Find latest year, rounding down
xmin_low = ceiling(as.numeric(min(df$year_low, na.rm = TRUE))) #Find earliest year, rounding up
xmax_low = floor(as.numeric(max(df$year_low, na.rm = TRUE))) #Find latest year, rounding up

interp = data.frame(approx(df$year, df$dT, xout = seq(xmin,xmax,1)))
interp_high = data.frame(approx(df$year_high, df$dT_high, xout = seq(xmin_high,xmax_high,1)))
interp_low = data.frame(approx(df$year_low, df$dT_low, xout = seq(xmin_low,xmax_low,1)))
interp_CO2 = data.frame(approx(df2$year_CO2, df2$CO2, xout = seq(xmin,xmax,1)))

ind_high = match(interp$x, interp_high$x)
interp$high = interp_high[ind_high,2]
ind_low = match(interp$x, interp_low$x)
interp$low = interp_low[ind_low,2]
ind_CO2 = match(interp$x, as.numeric(interp_CO2$x))
interp$CO2 = as.numeric(interp_CO2[ind_CO2,2])

interp$sd_high = interp$high - interp$y
interp$sd_low = interp$y - interp$low

interp$CO2scen = 'nominal'
df = interp[,c(1,2,6,7,5,8)]
df$cat = '1977 Black; 1979 Mastracchio'
colnames(df)[1:2] = c('year','dT')
#--------------------------------------------------------------------------------------------------------------------
proj = df
startyear = 1977
startyears = data.frame(cat = df$cat[1], start = startyear)

offsets_temp = mean(df[1:which(df$year==1900),2]) #Calculate mean value of pre-industrial temp range from first data point through 1900
offsets_temp2 = data.frame(cat = df$cat[1], Tpreind1 = df[1,1], Tpreind2 = df[which(df$year==1900),1], offsets_temp) #Save mean offset temp for time range of Tpreind1 through Tpreind2
offsets = rbind(offsets, offsets_temp2)
#--------------------------------------------------------------------------------------------------------------------
###1980 - Shaw###
df = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "1980 - Shaw")
df = df[-1,-c(1:3,6,9,12:ncol(df))]
colnames(df) = c('year_high','dT_high','year_low','dT_low','year_CO2','CO2')

xmin_high = ceiling(as.numeric(min(df$year_high, na.rm = TRUE))) #Find earliest year, rounding up
xmax_high = floor(as.numeric(max(df$year_high, na.rm = TRUE))) #Find latest year, rounding down
xmin_low = ceiling(as.numeric(min(df$year_low, na.rm = TRUE))) #Find earliest year, rounding up
xmax_low = floor(as.numeric(max(df$year_low, na.rm = TRUE))) #Find latest year, rounding down

interp_high = data.frame(approx(df$year_high, df$dT_high, xout = seq(xmin_high,xmax_high,1)))
interp_low = data.frame(approx(df$year_low, df$dT_low, xout = seq(xmin_low,xmax_low,1)))
interp_CO2 = data.frame(df[,5:6])

mid_high = interp_high[interp_high$x >= 2026,]
mid_low = interp_low[interp_low$x >= 2026 & interp_low$x <= 2078,]
mid_med = mid_low + (mid_high - mid_low)/2
ind_high = match(mid_med$x, mid_high$x)
mid_med$high = mid_high[ind_high,2]
ind_low = match(mid_med$x, mid_low$x)
mid_med$low = mid_low[ind_low,2]
mid_med$sd_high = mid_med$high - mid_med$y
mid_med$sd_low = mid_med$y - mid_med$low
mid_med = mid_med[,-c(3:4)]

end_low = interp_low[interp_low$x > 2078,]
end_med = end_low
end_med$y = end_low$y + (mid_med[nrow(mid_med),2] - mid_low[nrow(mid_low),2])
ind_low = match(end_med$x, end_low$x)
end_med$low = end_low[ind_low,2]
end_med$sd_low = end_med$y - end_med$low
end_med$sd_high = end_med$sd_low
end_med = end_med[,c(1,2,5,4)]

hist = interp_high[interp_high$x <= 1979,]
hist$sd_high = as.numeric(0)
hist$sd_low = as.numeric(0)
hist$sd_high[nrow(hist)] = interp_high[interp_high$x==1980,2] - hist[nrow(hist),2]
hist$sd_low[nrow(hist)] = hist$sd_high[nrow(hist)]

dfb = rbind(hist, mid_med, end_med)

dfb$cat = '1980 Shaw; 1982 Glaser (Figure9)'
colnames(dfb)[1:2] = c('year','dT')

dfb_interp = data.frame(approx(dfb$year, dfb$dT, xout = seq(dfb[1,1],dfb[nrow(dfb),1],1))) #We use dfb for plotting. We construct dfb_interp in order to have interpolated data over 2000s, enabling us to calculate TC through 2019.
colnames(dfb_interp) = c('year','dT')
ind = match(dfb_interp$year, dfb$year)
dfb_interp$sd_high = dfb[ind,3]
dfb_interp$sd_low = dfb[ind,4]
dfb_interp$cat = dfb[ind,5]

#Here, we calculate sd_high and sd_low values for intervening years (1979-2025). To do so, we imagine a straight line connecting known temp values at 1979 and 2026. At 1979, it is the historical value on the graph. At 2026, it is the upper and lower bounds defined by the graph's uncertainty range. We therefore compute two gradients: one between 1979 and the upper value at 2026; and one between 1979 and the lower value at 2026.
#These gradients define upper and lower slopes. We then compute difference between each slope and the centre line of dfb_interp, yielding sd_high and sd_low.
temp = dfb_interp
temp$dT_high = temp$dT
temp$dT_low = temp$dT
temp$dT_high[which(temp$year == 2026)] = temp$dT_high[which(temp$year == 2026)] + temp$sd_high[which(temp$year == 2026)]
gradient_high = (temp$dT_high[which(temp$year == 2026)] - temp$dT_high[which(temp$year == 1979)])/(2026-1979)
for(i in seq(1980, 2025, 1)){
  temp$dT_high[which(temp$year == i)] = temp$dT_high[which(temp$year == 1979)] + gradient_high*(i - 1979) #Create upper line as centre line plus gradient*intervening years
}
temp$dT_low[which(temp$year == 2026)] = temp$dT_low[which(temp$year == 2026)] - temp$sd_high[which(temp$year == 2026)]
gradient_low = (temp$dT_low[which(temp$year == 2026)] - temp$dT_low[which(temp$year == 1979)])/(2026-1979)
for(i in seq(1980, 2025, 1)){
  temp$dT_low[which(temp$year == i)] = temp$dT_low[which(temp$year == 1979)] + gradient_low*(i - 1979)  #Create lower line as centre line plus gradient*intervening years
}
colnames(temp) = c('year_temp','dT_temp','sd_high_temp','sd_low_temp','cat_temp','dT_high','dT_low')

#Calculate sd_high and sd_low as the difference between upper/lower curves and the centre line.
dfb_interp2 = cbind(dfb_interp,temp)
dfb_interp2$sd_high[which(dfb_interp2$year>=1979 & dfb_interp2$year<2026)] = dfb_interp2$dT_high[which(dfb_interp2$year>=1979 & dfb_interp2$year<2026)] - dfb_interp2$dT[which(dfb_interp2$year>=1979 & dfb_interp2$year<2026)]
dfb_interp2$sd_low[which(dfb_interp2$year>=1979 & dfb_interp2$year<2026)] = dfb_interp2$dT[which(dfb_interp2$year>=1979 & dfb_interp2$year<2026)] - dfb_interp2$dT_low[which(dfb_interp2$year>=1979 & dfb_interp2$year<2026)]
dfb_interp2$cat[which(dfb_interp2$year>1979 & dfb_interp2$year<2026)] = dfb_interp2$cat[which(dfb_interp2$year==1979)]
dfb_interp2 = dfb_interp2[,c(1:5)]

ind_CO2 = match(dfb_interp2$year, as.numeric(interp_CO2$year_CO2))
dfb_interp2$CO2 = as.numeric(interp_CO2[ind_CO2,2])

dfb_interp2$CO2scen = 'nominal'
dfb_interp2 = dfb_interp2[,c(1:4,6:7,5)]
df = dfb_interp2
#--------------------------------------------------------------------------------------------------------------------
proj = rbind(proj, df)
startyear = 1980
startyears = rbind(startyears, data.frame(cat = df$cat[1], start = startyear))

offsets_temp = mean(df[1:which(df$year==1900),2]) #Calculate mean value of pre-industrial temp range from first data point through 1900
offsets_temp2 = data.frame(cat = df$cat[1], Tpreind1 = df[1,1], Tpreind2 = df[which(df$year==1900),1], offsets_temp) #Save mean offset temp for time range of Tpreind1 through Tpreind2
offsets = rbind(offsets, offsets_temp2)
#--------------------------------------------------------------------------------------------------------------------
###1982 - Glaser (figure 3)###
df = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "1982 - Glaser")
df = df[-1,-c(1:3,6,9:28)]
colnames(df) = c('year','dT','year2','dT2','year_CO2','CO2')
df = df[,-c(3:4)]

xmin = ceiling(as.numeric(min(df$year, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year, na.rm = TRUE))) #Find latest year, rounding down
interp = data.frame(approx(df$year, df$dT, xout = seq(xmin,xmax,1)))

interp$sd_high = NA
interp$sd_low = NA
interp$cat = '1982 Glaser (Figure 3); 1984 Shaw'
colnames(interp) = c('year','dT','sd_high','sd_low','cat')
dfb = interp

#Interpolate CO2 values then match to years corresponding to dT values
xmin = ceiling(as.numeric(min(df$year_CO2, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year_CO2, na.rm = TRUE))) #Find latest year, rounding down
interp = data.frame(approx(df$year_CO2, df$CO2, xout = seq(xmin,xmax,1)))
ind_CO2 = match(dfb$year, as.numeric(interp$x))
dfb$CO2 = as.numeric(interp[ind_CO2,2])

dfb$CO2scen = 'nominal'
dfb = dfb[,c(1:4,6,7,5)]
df = dfb
#--------------------------------------------------------------------------------------------------------------------
proj = rbind(proj, df)
startyear = 1982
startyears = rbind(startyears, data.frame(cat = df$cat[1], start = startyear))

offsets_temp = df[1,2] #Normally we calculate mean value of pre-industrial temp range over some preindustrial period. Here, however, no preindustrial temp data plotted. Therefore just zero to the first year.
offsets_temp2 = data.frame(cat = df$cat[1], Tpreind1 = df[1,1], Tpreind2 = df[1,1], offsets_temp) #Save offset temp for time range of Tpreind1 through Tpreind2 (which, in this case, are both the first and only year of data used)
offsets = rbind(offsets, offsets_temp2)
#--------------------------------------------------------------------------------------------------------------------
###1982 - Weinberg###
df = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "1982 - Weinberg")
df = df[-1,-c(1:3,6,9,12)]
colnames(df) = c('year','dT','year_low','dT_low','year_high','dT_high','year_CO2','CO2')

xmin = ceiling(as.numeric(min(df$year, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year, na.rm = TRUE))) #Find latest year, rounding down
xmin_high = ceiling(as.numeric(min(df$year_high, na.rm = TRUE))) #Find earliest year, rounding up
xmax_high = floor(as.numeric(max(df$year_high, na.rm = TRUE))) #Find latest year, rounding down
xmin_low = ceiling(as.numeric(min(df$year_low, na.rm = TRUE))) #Find earliest year, rounding up
xmax_low = floor(as.numeric(max(df$year_low, na.rm = TRUE))) #Find latest year, rounding down

interp = data.frame(approx(df$year, df$dT, xout = seq(xmin,xmax,1)))
interp_high = data.frame(approx(df$year_high, df$dT_high, xout = seq(xmin_high,xmax_high,1)))
interp_low = data.frame(approx(df$year_low, df$dT_low, xout = seq(xmin_low,xmax_low,1)))

ind_high = match(interp$x, interp_high$x)
interp$high = interp_high[ind_high,2]
ind_low = match(interp$x, interp_low$x)
interp$low = interp_low[ind_low,2]

interp$sd_high = interp$high - interp$y
interp$sd_low = interp$y - interp$low

dfb = interp
colnames(dfb)[1:2] = c('year','dT')

#Match CO2 values (already interpolated by Hausfather  et al. (2020)) to years corresponding to dT values
interp = data.frame(df[,c(7:8)])
ind_CO2 = match(dfb$year, as.numeric(interp$year_CO2))
dfb$CO2 = as.numeric(interp[ind_CO2,2])

dfb$CO2scen = 'nominal'
dfb$cat = '1982 Weinberg; 1984 Callegari'
dfb = dfb[,c(1,2,5:9)]
df = dfb
#--------------------------------------------------------------------------------------------------------------------
proj = rbind(proj, df)
startyear = 1982
startyears = rbind(startyears, data.frame(cat = df$cat[1], start = startyear))

offsets_temp = mean(df[1:which(df$year==1970),2]) #Calculate mean value of pre-industrial temp range from first data point through 1970
offsets_temp2 = data.frame(cat = df$cat[1], Tpreind1 = df[1,1], Tpreind2 = df[which(df$year==1970),1], offsets_temp) #Save mean offset temp for time range of Tpreind1 through Tpreind2
offsets = rbind(offsets, offsets_temp2)
#--------------------------------------------------------------------------------------------------------------------
###1985 - Flannery (page 23)###
df = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "1985 - Flannery")
df = df[-1,c(13:14,32:33)]
colnames(df) = c('year','dT','year_CO2_nominal','CO2_nominal')

xmin = ceiling(as.numeric(min(df$year, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year, na.rm = TRUE))) #Find latest year, rounding down
interp = data.frame(approx(df$year, df$dT, xout = seq(xmin,xmax,1)))

interp$sd_high = NA
interp$sd_low = NA
interp$cat = '1985 Flannery (Page 23)'
colnames(interp) = c('year','dT','sd_high','sd_low','cat')
dfb = interp

#Interpolate CO2 values then match to years corresponding to dT values
xmin = ceiling(as.numeric(min(df$year_CO2_nominal, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year_CO2_nominal, na.rm = TRUE))) #Find latest year, rounding down
interp = data.frame(approx(df$year_CO2_nominal, df$CO2_nominal, xout = seq(xmin,xmax,1)))
ind_CO2 = match(dfb$year, as.numeric(interp$x))
dfb$CO2 = as.numeric(interp[ind_CO2,2])

dfb$CO2scen = 'nominal'
dfb = dfb[,c(1:4,6,5,7)]
df = dfb
#--------------------------------------------------------------------------------------------------------------------
proj = rbind(proj, df)
startyear = 1985
startyears = rbind(startyears, data.frame(cat = df$cat[1], start = startyear))

offsets_temp = df[1,2] #Normally we calculate mean value of pre-industrial temp range over some preindustrial period. Here, however, no preindustrial temp data plotted: 1850 is treated as the zero value. Therefore just zero to the first year.
offsets_temp2 = data.frame(cat = df$cat[1], Tpreind1 = df[1,1], Tpreind2 = df[1,1], offsets_temp) #Save offset temp for time range of Tpreind1 through Tpreind2 (which, in this case, are both the first and only year of data used)
offsets = rbind(offsets, offsets_temp2)
#--------------------------------------------------------------------------------------------------------------------
###1985 - Flannery (page 24)###
df = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "1985 - Flannery")
df = df[-1,c(16:17,19:20,22:23,32:33,35:36,38:39)]
colnames(df) = c('year','dT','year_high','dT_high','year_low','dT_low','year_CO2_nominal','CO2_nominal','year_CO2_high','CO2_high','year_CO2_low','CO2_low')

xmin = ceiling(as.numeric(min(df$year, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year, na.rm = TRUE))) #Find latest year, rounding down
xmin_high = ceiling(as.numeric(min(df$year_high, na.rm = TRUE))) #Find earliest year, rounding up
xmax_high = floor(as.numeric(max(df$year_high, na.rm = TRUE))) #Find latest year, rounding down
xmin_low = ceiling(as.numeric(min(df$year_low, na.rm = TRUE))) #Find earliest year, rounding up
xmax_low = floor(as.numeric(max(df$year_low, na.rm = TRUE))) #Find latest year, rounding down

interp = data.frame(approx(df$year, df$dT, xout = seq(xmin,xmax,1)))
interp_high = data.frame(approx(df$year_high, df$dT_high, xout = seq(xmin_high,xmax_high,1)))
interp_low = data.frame(approx(df$year_low, df$dT_low, xout = seq(xmin_low,xmax_low,1)))

ind_high = match(interp$x, interp_high$x)
interp$high = interp_high[ind_high,2]
ind_low = match(interp$x, interp_low$x)
interp$low = interp_low[ind_low,2]

interp_temp1 = interp[,c(1,2)]
colnames(interp_temp1) = c('year','dT')
interp_temp1$CO2scen = 'nominal'

interp_temp2 = interp[,c(1,3)]
colnames(interp_temp2) = c('year','dT')
interp_temp2$CO2scen = 'high'

interp_temp3 = interp[,c(1,4)]
colnames(interp_temp3) = c('year','dT')
interp_temp3$CO2scen = 'low'

interp2 = rbind(interp_temp1, interp_temp2, interp_temp3)

interp2$sd_high = NA
interp2$sd_low = NA

dfb = interp2
dfb = dfb[,c(1,2,4,5,3)]
dfb$cat = '1985 Flannery (Page 24)'

#Interpolate CO2 values
xmin = ceiling(as.numeric(min(df$year_CO2_nominal, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year_CO2_nominal, na.rm = TRUE))) #Find latest year, rounding down
interp_nominal = data.frame(approx(df$year_CO2_nominal, df$CO2_nominal, xout = seq(xmin,xmax,1)))
interp_nominal$CO2scen = 'nominal'
xmin = ceiling(as.numeric(min(df$year_CO2_high, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year_CO2_high, na.rm = TRUE))) #Find latest year, rounding down
interp_high = data.frame(approx(df$year_CO2_high, df$CO2_high, xout = seq(xmin,xmax,1)))
interp_high$CO2scen = 'high'
xmin = ceiling(as.numeric(min(df$year_CO2_low, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year_CO2_low, na.rm = TRUE))) #Find latest year, rounding down
interp_low = data.frame(approx(df$year_CO2_low, df$CO2_low, xout = seq(xmin,xmax,1)))
interp_low$CO2scen = 'low'
interp = rbind(interp_nominal, interp_high, interp_low)

dfc = data.frame()
scens = unique(dfb$CO2scen)
for(i in 1:length(scens)){
  dfb_temp = dfb[dfb$CO2scen == scens[i],]
  if(i==1){
    offsets_temp = dfb_temp[1,2] #Normally we calculate mean value of pre-industrial temp range over some preindustrial period. Here, however, no preindustrial temp data plotted: 1850 is treated as the zero value. Therefore just zero to the first year.
    offsets_temp2 = data.frame(cat = dfb_temp$cat[1], Tpreind1 = dfb_temp[1,1], Tpreind2 = dfb_temp[1,1], offsets_temp) #Save offset temp for time range of Tpreind1 through Tpreind2 (which, in this case, are both the first and only year of data used)
    offsets = rbind(offsets, offsets_temp2)
  }
  
  #Match CO2 values to years corresponding to dT values
  interp_temp = interp[interp$CO2scen == scens[i],]
  ind_CO2 = match(dfb_temp$year, as.numeric(interp_temp$x))
  dfb_temp$CO2 = as.numeric(interp_temp[ind_CO2,2])
  
  dfc = rbind(dfc, dfb_temp)
}
dfc = dfc[,c(1:4,7,5:6)]
df = dfc
#--------------------------------------------------------------------------------------------------------------------
proj = rbind(proj, df)
startyear = 1985
startyears = rbind(startyears, data.frame(cat = df$cat[1], start = startyear))
#--------------------------------------------------------------------------------------------------------------------
#PEER-REVIEWED DOCUMENTS
#--------------------------------------------------------------------------------------------------------------------
###1985 - Hoffert (figure 5.16)###
df = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "1985 - Hoffert")
df = df[-1,c(13:14,19:20,22:23,25:26,28:29,31:34)]
colnames(df) = c('year_high','dT_high','year_low','dT_low','year_nominal_lambda1.1','dT_nominal_lambda1.1','year_nominal_lambda2.2','dT_nominal_lambda2.2','year_nominal_lambda4.4','dT_nominal_lambda4.4','year_CO2','CO2_high','CO2_nominal','CO2_low')

xmin_lambda2.2 = ceiling(as.numeric(min(df$'year_nominal_lambda2.2', na.rm = TRUE))) #Find earliest year, rounding up
xmax_lambda2.2 = floor(as.numeric(max(df$'year_nominal_lambda2.2', na.rm = TRUE))) #Find latest year, rounding down
xmin_lambda4.4 = ceiling(as.numeric(min(df$'year_nominal_lambda4.4', na.rm = TRUE))) #Find earliest year, rounding up
xmax_lambda4.4 = floor(as.numeric(max(df$'year_nominal_lambda4.4', na.rm = TRUE))) #Find latest year, rounding down
xmin_lambda1.1 = ceiling(as.numeric(min(df$'year_nominal_lambda1.1', na.rm = TRUE))) #Find earliest year, rounding up
xmax_lambda1.1 = floor(as.numeric(max(df$'year_nominal_lambda1.1', na.rm = TRUE))) #Find latest year, rounding down
xmin_high = ceiling(as.numeric(min(df$year_high, na.rm = TRUE))) #Find earliest year, rounding up
xmax_high = floor(as.numeric(max(df$year_high, na.rm = TRUE))) #Find latest year, rounding down
xmin_low = ceiling(as.numeric(min(df$year_low, na.rm = TRUE))) #Find earliest year, rounding up
xmax_low = floor(as.numeric(max(df$year_low, na.rm = TRUE))) #Find latest year, rounding down

interp_lambda2.2 = data.frame(approx(df$'year_nominal_lambda2.2', df$'dT_nominal_lambda2.2', xout = seq(xmin_lambda2.2,xmax_lambda2.2,1)))
interp_lambda4.4 = data.frame(approx(df$'year_nominal_lambda4.4', df$'dT_nominal_lambda4.4', xout = seq(xmin_lambda4.4,xmax_lambda4.4,1)))
interp_lambda1.1 = data.frame(approx(df$'year_nominal_lambda1.1', df$'dT_nominal_lambda1.1', xout = seq(xmin_lambda1.1,xmax_lambda1.1,1)))
interp_high = data.frame(approx(df$year_high, df$dT_high, xout = seq(xmin_high,xmax_high,1)))
interp_low = data.frame(approx(df$year_low, df$dT_low, xout = seq(xmin_low,xmax_low,1)))

#Correct interp_high from absolute temperatures to changes in temperature
temp = data.frame(y = diff(interp_high$y))
temp2 = rbind(data.frame(y = 0), cumsum(temp))
interp_high2 = cbind(x=interp_high$x, temp2)

#Correct interp_low from absolute temperatures to changes in temperature
temp = data.frame(y = diff(interp_low$y))
temp2 = rbind(data.frame(y = 0), cumsum(temp))
interp_low2 = cbind(x=interp_low$x, temp2)

ind_high = match(interp_lambda2.2$x, interp_high2$x)
interp_lambda2.2$high = interp_high2[ind_high,2]
ind_low = match(interp_lambda2.2$x, interp_low2$x)
interp_lambda2.2$low = interp_low2[ind_low,2]
ind_lambda4.4 = match(interp_lambda2.2$x, interp_lambda4.4$x)
interp_lambda2.2$lambda4.4 = interp_lambda4.4[ind_lambda4.4,2]
ind_lambda1.1 = match(interp_lambda2.2$x, interp_lambda1.1$x)
interp_lambda2.2$lambda1.1 = interp_lambda1.1[ind_lambda1.1,2]

#Create error bars by calculating temperature differences between lambda = 2.2 W/m2/K scenario and higher and lower lambda scenarios.
interp_lambda2.2$sd_low = interp_lambda2.2$y - interp_lambda2.2$lambda4.4
interp_lambda2.2$sd_high = interp_lambda2.2$lambda1.1 - interp_lambda2.2$y

interp_temp1 = interp_lambda2.2[,c(1,2,7,8)]
colnames(interp_temp1) = c('year','dT','sd_high','sd_low')
interp_temp1$CO2scen = 'nominal'

interp_temp2 = interp_lambda2.2[,c(1,3)]
colnames(interp_temp2) = c('year','dT')
interp_temp2$sd_high = as.numeric(NA)
interp_temp2$sd_low = as.numeric(NA)
interp_temp2$CO2scen = 'high'

interp_temp3 = interp_lambda2.2[,c(1,4)]
colnames(interp_temp3) = c('year','dT')
interp_temp3$sd_high = as.numeric(NA)
interp_temp3$sd_low = as.numeric(NA)
interp_temp3$CO2scen = 'low'

interp2 = rbind(interp_temp1, interp_temp2, interp_temp3)

dfb = interp2
dfb$cat = '1985 Hoffert (fig. 5.16)'

#Interpolate CO2 values (in this case, no actual interpolation is needed as CO2 values are for the same range of integer years)
interp_nominal = data.frame(df$year_CO2, df$CO2_nominal)
colnames(interp_nominal) = c('year','CO2')
interp_nominal$CO2scen = 'nominal'
interp_high = data.frame(df$year_CO2, df$CO2_high)
colnames(interp_high) = c('year','CO2')
interp_high$CO2scen = 'high'
interp_low = data.frame(df$year_CO2, df$CO2_low)
colnames(interp_low) = c('year','CO2')
interp_low$CO2scen = 'low'
interp = rbind(interp_nominal, interp_high, interp_low)

dfc = data.frame()
scens = unique(dfb$CO2scen)
for(i in 1:length(scens)){
  dfb_temp = dfb[dfb$CO2scen == scens[i],]
  if(i==1){
    offsets_temp = dfb_temp[1,2] #Normally we calculate mean value of pre-industrial temp range over some preindustrial period. Here, however, no preindustrial temp data plotted: ~1850 is treated as the zero value. Therefore just zero to the first year.
    offsets_temp2 = data.frame(cat = dfb_temp$cat[1], Tpreind1 = dfb_temp[1,1], Tpreind2 = dfb_temp[1,1], offsets_temp) #Save offset temp for time range of Tpreind1 through Tpreind2 (which, in this case, are both the first and only year of data used)
    offsets = rbind(offsets, offsets_temp2)
  }
  
  #Match CO2 values to years corresponding to dT values
  interp_temp = interp[interp$CO2scen == scens[i],]
  ind_CO2 = match(dfb_temp$year, as.numeric(interp_temp$year))
  dfb_temp$CO2 = as.numeric(interp_temp[ind_CO2,2])
  
  dfc = rbind(dfc, dfb_temp)
}
dfc = dfc[,c(1:4,7,5:6)]
df = dfc
#--------------------------------------------------------------------------------------------------------------------
proj = rbind(proj, df)
startyear = 1985
startyears = rbind(startyears, data.frame(cat = df$cat[1], start = startyear))
#--------------------------------------------------------------------------------------------------------------------
###1994 - Jain###
df = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "1994 - Jain")
df = df[-1,c(4:5,7:8,10:11)]
colnames(df) = c('year_high','dT_high','year_low','dT_low','year_CO2','CO2')

xmin_high = ceiling(as.numeric(min(df$year_high, na.rm = TRUE))) #Find earliest year, rounding up
xmax_high = floor(as.numeric(max(df$year_high, na.rm = TRUE))) #Find latest year, rounding down
xmin_low = ceiling(as.numeric(min(df$year_low, na.rm = TRUE))) #Find earliest year, rounding up
xmax_low = floor(as.numeric(max(df$year_low, na.rm = TRUE))) #Find latest year, rounding down

interp_high = data.frame(approx(df$year_high, df$dT_high, xout = seq(xmin_high,xmax_high,1)))
interp_low = data.frame(approx(df$year_low, df$dT_low, xout = seq(xmin_low,xmax_low,1)))

#There is no 'nominal' scenario data in this dataset, but we calculate one as the halfway point between low and high values.
ind_high = match(interp_low$x, interp_high$x)
interp_low$high = interp_high[ind_high,2]
colnames(interp_low) = c('x','low','high')
interp_low$y = interp_low$low + (interp_low$high - interp_low$low)/2 #Calculate 'nominal' scenario as halfwaypoint between low and high values.
interp_low = interp_low[,c(1,4,2,3)]
interp = interp_low

#Construct error bars by calculating temperature differences between nominal scenario and low/high values.
interp$sd_low = interp$y - interp$low
interp$sd_high = interp$high - interp$y

interp_temp1 = interp[,c(1,2,6,5)]
colnames(interp_temp1) = c('year','dT','sd_high','sd_low')
interp_temp1$CO2scen = 'nominal'

dfb = interp_temp1
dfb$cat = '1994 Jain'

#Interpolate CO2 values then match to years corresponding to dT values
xmin = ceiling(as.numeric(min(df$year_CO2, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year_CO2, na.rm = TRUE))) #Find latest year, rounding down
interp = data.frame(approx(df$year_CO2, df$CO2, xout = seq(xmin,xmax,1)))
ind_CO2 = match(dfb$year, as.numeric(interp$x))
dfb$CO2 = as.numeric(interp[ind_CO2,2])

dfb$CO2scen = 'nominal'
dfb = dfb[,c(1:4,7,5:6)]
df = dfb
#--------------------------------------------------------------------------------------------------------------------
proj = rbind(proj, df)
startyear = 1994
startyears = rbind(startyears, data.frame(cat = df$cat[1], start = startyear))

offsets_temp = df[1,2] #Normally we calculate mean value of pre-industrial temp range over some preindustrial period. Here, however, no preindustrial temp data plotted: 1850 is treated as the starting value. Therefore just zero to the first year.
offsets_temp2 = data.frame(cat = df$cat[1], Tpreind1 = df[1,1], Tpreind2 = df[1,1], offsets_temp) #Save offset temp for time range of Tpreind1 through Tpreind2 (which, in this case, are both the first and only year of data used)
offsets = rbind(offsets, offsets_temp2)
#--------------------------------------------------------------------------------------------------------------------
###1997 - Kheshgi###
df = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "1997 - Kheshgi")
df = df[-1,c(4:5,24)]
colnames(df) = c('year','dT','CO2')
df[,c(1:3)] = sapply(df[,c(1:3)],as.numeric)

df$sd_high = as.numeric(NA)
df$sd_low = as.numeric(NA)
df$CO2scen = 'nominal'
df$cat = '1997 Kheshgi'

df = data.frame(df[,c(1,2,4,5,3,6,7)])
#--------------------------------------------------------------------------------------------------------------------
proj = rbind(proj, df)
startyear = 1997
startyears = rbind(startyears, data.frame(cat = df$cat[1], start = startyear))

offsets_temp = df[1,2] #Normally we calculate mean value of pre-industrial temp range over some preindustrial period. Here, however, no preindustrial temp data plotted: 1995 is treated as the starting value. Therefore just zero to the first year.
offsets_temp2 = data.frame(cat = df$cat[1], Tpreind1 = df[1,1], Tpreind2 = df[1,1], offsets_temp) #Save offset temp for time range of Tpreind1 through Tpreind2 (which, in this case, are both the first and only year of data used)
offsets = rbind(offsets, offsets_temp2)
#--------------------------------------------------------------------------------------------------------------------
###2001 - Albritton###
df = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "2001 - Albritton - v2")
df = df[-c(1:2),c(4:7,9:10)]
colnames(df) = c('year','dT','dT_low','dT_high','year_F','F')
df = data.frame(sapply(df, as.numeric))

dfb = df[,c(1:4)]

#Construct error bars by calculating temperature differences between average scenario and max/min values.
dfb$sd_high = dfb$dT_high - dfb$dT
dfb$sd_low = dfb$dT - dfb$dT_low
dfb = dfb[,-c(3:4)]

#Interpolate CO2 (actually, forcing) values then match to years corresponding to dT values
dfc = df[c(1:12),c(5:6)]
interp = data.frame(approx(dfc$year_F, dfc$F, xout = seq(dfc[1,1],dfc[nrow(dfc),1],1)))
ind_CO2 = match(dfb$year, as.numeric(interp$x))
dfb$CO2 = as.numeric(interp[ind_CO2,2])

dfb$CO2scen = 'nominal'
dfb$cat = '2001 Albritton'
df = dfb
#--------------------------------------------------------------------------------------------------------------------
proj = rbind(proj, df)
startyear = 2001
startyears = rbind(startyears, data.frame(cat = df$cat[1], start = startyear))

offsets_temp = 0 #Normally we calculate mean value of pre-industrial temp range over some preindustrial period. Here, however, no preindustrial temp data plotted: 1990 is treated as the starting value. Therefore just zero to that year.
offsets_temp2 = data.frame(cat = df$cat[1], Tpreind1 = 1990, Tpreind2 = 1990, offsets_temp) #Save mean offset temp for time range of Tpreind1 through Tpreind2
offsets = rbind(offsets, offsets_temp2)
#--------------------------------------------------------------------------------------------------------------------
###2003 - Kheshgi (figure 7)###
df = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "2003 - Kheshgi")
df = df[-1,c(4:5,7:8,10:11,13:14)]
colnames(df) = c('year','dT','year_high', 'dT_high','year_low','dT_low','year_CO2','CO2')
df = data.frame(sapply(df, as.numeric))

xmin = ceiling(as.numeric(min(df$year, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year, na.rm = TRUE))) #Find latest year, rounding down
xmin_high = ceiling(as.numeric(min(df$year_high, na.rm = TRUE))) #Find earliest year, rounding up
xmax_high = floor(as.numeric(max(df$year_high, na.rm = TRUE))) #Find latest year, rounding down
xmin_low = ceiling(as.numeric(min(df$year_low, na.rm = TRUE))) #Find earliest year, rounding up
xmax_low = floor(as.numeric(max(df$year_low, na.rm = TRUE))) #Find latest year, rounding down

interp = data.frame(approx(df$year, df$dT, xout = seq(xmin,xmax,1)))
interp_high = data.frame(approx(df$year_high, df$dT_high, xout = seq(xmin_high,xmax_high,1)))
interp_low = data.frame(approx(df$year_low, df$dT_low, xout = seq(xmin_low,xmax_low,1)))

ind_high = match(interp$x, interp_high$x)
interp$high = interp_high[ind_high,2]
ind_low = match(interp$x, interp_low$x)
interp$low = interp_low[ind_low,2]

#Construct error bars by calculating temperature differences between average scenario and low/high values.
interp$sd_high = interp$high - interp$y
interp$sd_low = interp$y - interp$low
interp = interp[,-c(3:4)]
colnames(interp) = c('year','dT','sd_high','sd_low')

dfb = interp

#Interpolate CO2 values then match to years corresponding to dT values
xmin = ceiling(as.numeric(min(df$year_CO2, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year_CO2, na.rm = TRUE))) #Find latest year, rounding down
interp = data.frame(approx(df$year_CO2, df$CO2, xout = seq(xmin,xmax,1)))
ind_CO2 = match(dfb$year, as.numeric(interp$x))
dfb$CO2 = as.numeric(interp[ind_CO2,2])

dfb$CO2scen = 'nominal'
dfb$cat = '2003 Kheshgi (fig. 7)'
df = dfb
#--------------------------------------------------------------------------------------------------------------------
proj = rbind(proj, df)
startyear = 2003
startyears = rbind(startyears, data.frame(cat = df$cat[1], start = startyear))

#Normally we calculate mean value of pre-industrial temp range over some preindustrial period. Here, no preindustrial temp data plotted, however section 3 explains that temperatures are anomalies "from 1856 through 1875". We therefore zero around that range. Because this is an anomalous case in which the left cutoff of the figure (1990) is after the year at which dT is zeroed, we will later offset the historical data so that it is zeroed around 1856-75. Offsets here are in this case meaningless; as placeholders, simply put 1990, 1990, and zero.
offsets_temp2 = data.frame(cat = df$cat[1], Tpreind1 = 1990, Tpreind2 = 1990, offsets_temp = 0)
offsets = rbind(offsets, offsets_temp2)
#--------------------------------------------------------------------------------------------------------------------
###2003 - Kheshgi (figure 8)###
df = read_excel(paste(dir,'RawData_projected.xlsx',sep=''), sheet = "2003 - Kheshgi")
df = df[-1,c(17:18,20:21,23:24,26:27)]
colnames(df) = c('year','dT','year_high', 'dT_high','year_low','dT_low','year_CO2','CO2')
df = data.frame(sapply(df, as.numeric))

xmin = ceiling(as.numeric(min(df$year, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year, na.rm = TRUE))) #Find latest year, rounding down
xmin_high = ceiling(as.numeric(min(df$year_high, na.rm = TRUE))) #Find earliest year, rounding up
xmax_high = floor(as.numeric(max(df$year_high, na.rm = TRUE))) #Find latest year, rounding down
xmin_low = ceiling(as.numeric(min(df$year_low, na.rm = TRUE))) #Find earliest year, rounding up
xmax_low = floor(as.numeric(max(df$year_low, na.rm = TRUE))) #Find latest year, rounding down

interp = data.frame(approx(df$year, df$dT, xout = seq(xmin,xmax,1)))
interp_high = data.frame(approx(df$year_high, df$dT_high, xout = seq(xmin_high,xmax_high,1)))
interp_low = data.frame(approx(df$year_low, df$dT_low, xout = seq(xmin_low,xmax_low,1)))

ind_high = match(interp$x, interp_high$x)
interp$high = interp_high[ind_high,2]
ind_low = match(interp$x, interp_low$x)
interp$low = interp_low[ind_low,2]

#Create error bars by calculating temperature differences between average scenario and low/high values.
interp$sd_high = interp$high - interp$y
interp$sd_low = interp$y - interp$low
interp = interp[,-c(3:4)]
colnames(interp) = c('year','dT','sd_high','sd_low')

dfb = interp

#Interpolate CO2 values then match to years corresponding to dT values
xmin = ceiling(as.numeric(min(df$year_CO2, na.rm = TRUE))) #Find earliest year, rounding up
xmax = floor(as.numeric(max(df$year_CO2, na.rm = TRUE))) #Find latest year, rounding down
interp = data.frame(approx(df$year_CO2, df$CO2, xout = seq(xmin,xmax,1)))
ind_CO2 = match(dfb$year, as.numeric(interp$x))
dfb$CO2 = as.numeric(interp[ind_CO2,2])

dfb$CO2scen = 'nominal'
dfb$cat = '2003 Kheshgi (fig. 8)'
df = dfb
#--------------------------------------------------------------------------------------------------------------------
proj = rbind(proj, df)
startyear = 2003
startyears = rbind(startyears, data.frame(cat = df$cat[1], start = startyear))

#Normally we calculate mean value of pre-industrial temp range over some preindustrial period. Here, no preindustrial temp data plotted, however section 3 explains that temperatures are anomalies "from 1856 through 1875". Therefore zero around that range. Because this is an anomalous case in which the left cutoff of the figure (1990) is after the year at which dT is zeroed, we will later offset the historical data so that it is zeroed around 1856-75. Offsets here are in this case meaningless; as placeholders, simply put 1990, 1990, and zero.
offsets_temp2 = data.frame(cat = df$cat[1], Tpreind1 = 1990, Tpreind2 = 1990, offsets_temp = 0)
offsets = rbind(offsets, offsets_temp2)
#--------------------------------------------------------------------------------------------------------------------
#SAVE ALL PROJECTION DATA
dir = 'Data/'
fname = 'projections.rda'
save(proj,startyears, offsets, file = paste(dir,fname,sep=''))
###############################################################################################################################################################################
#4. MAIN CODE: Load Historical data
dir = 'Raw data/'
offsets = data.frame()
#--------------------------------------------------------------------------------------------------------------------
###Hadley HadCRUT4###
df = read_excel(paste(dir,'RawData_historical.xlsx',sep=''), sheet = "Hadley HadCRUT4")

df = df[,-c(3:ncol(df))]
df$sd_high = as.numeric(0)
df$sd_low = as.numeric(0)
df$cat = 'Historical (Hadley HadCRUT4)'
colnames(df) = c('year','dT','sd_high','sd_low','cat')
df$dT = df$dT - as.numeric(df[df$year == 1979,2]) #Zero data to 1979 to be consistent with other datasets.
#--------------------------------------------------------------------------------------------------------------------
hist = rbind(hist, df)
#--------------------------------------------------------------------------------------------------------------------
###NOAA GlobalTemp###
df = read_excel(paste(dir,'RawData_historical.xlsx',sep=''), sheet = "NOAA GlobalTemp")

df = df[,-c(3:ncol(df))]
df$sd_high = as.numeric(0)
df$sd_low = as.numeric(0)
df$cat = 'Historical (NOAA GlobalTemp)'
colnames(df) = c('year','dT','sd_high','sd_low','cat')
df$dT = df$dT - as.numeric(df[df$year == 1979,2]) #Zero data to 1979 to be consistent with other datasets.
#--------------------------------------------------------------------------------------------------------------------
hist = rbind(hist, df)
#--------------------------------------------------------------------------------------------------------------------
###NASA GISTEMP###
df = read_excel(paste(dir,'RawData_historical.xlsx',sep=''), sheet = "NASA GISTEMP")

df = df[-c(1,nrow(df)),c(1,14)] #NB Remove final row because it is a blank to-be-filled 2020 row
df$sd_high = as.numeric(0)
df$sd_low = as.numeric(0)
df$cat = 'Historical (NASA GISTEMP)'
colnames(df) = c('year','dT','sd_high','sd_low','cat')
df$dT = as.numeric(df$dT) - as.numeric(df[df$year == 1979,2]) #Zero data to 1979 to be consistent with other datasets.
#--------------------------------------------------------------------------------------------------------------------
hist = rbind(hist, df)
#--------------------------------------------------------------------------------------------------------------------
###Berkeley Earth###
df = read_excel(paste(dir,'RawData_historical.xlsx',sep=''), sheet = "Berkeley Earth")

df = df[-c(1:47),-c(3:ncol(df))]
df$sd_high = as.numeric(0)
df$sd_low = as.numeric(0)
df$cat = 'Historical (Berkeley Earth)'
colnames(df) = c('year','dT','sd_high','sd_low','cat')
df$dT = as.numeric(df$dT) - as.numeric(df[df$year == 1979,2]) #Zero data to 1979 to be consistent with other datasets.
#--------------------------------------------------------------------------------------------------------------------
hist = rbind(hist, df)
#--------------------------------------------------------------------------------------------------------------------
###Cowtan & Way###
df = read_excel(paste(dir,'RawData_historical.xlsx',sep=''), sheet = "Cowtan & Way")

df = df[,-c(3:ncol(df))]
df$sd_high = as.numeric(0)
df$sd_low = as.numeric(0)
df$cat = 'Historical (Cowtan & Way)'
colnames(df) = c('year','dT','sd_high','sd_low','cat')
df$dT = df$dT - as.numeric(df[df$year == 1979,2]) #Zero data to 1979 to be consistent with other datasets.
#--------------------------------------------------------------------------------------------------------------------
hist = rbind(hist, df)
#--------------------------------------------------------------------------------------------------------------------
#SAVE ALL HISTORICAL DATA
dir = 'Data/'
fname = 'historical.rda'
save(hist, file = paste(dir,fname,sep=''))
###############################################################################################################################################################################